#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long;
use POSIX qw(strftime);
use FindBin qw($RealBin);
use lib "$FindBin::RealBin/source";
use CF::Constants;
use CF::Helpers;
use CF::Headnodehelpers;

no warnings qw(once);

##########################################################################
# Copyright 2014, Philip Ewels (phil.ewels@scilifelab.se)                #
#                                                                        #
# This file is part of Cluster Flow.                                     #
#                                                                        #
# Cluster Flow is free software: you can redistribute it and/or modify   #
# it under the terms of the GNU General Public License as published by   #
# the Free Software Foundation, either version 3 of the License, or      #
# (at your option) any later version.                                    #
#                                                                        #
# Cluster Flow is distributed in the hope that it will be useful,        #
# but WITHOUT ANY WARRANTY; without even the implied warranty of         #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
# GNU General Public License for more details.                           #
#                                                                        #
# You should have received a copy of the GNU General Public License      #
# along with Cluster Flow.  If not, see <http://www.gnu.org/licenses/>.  #
##########################################################################

my $CF_VERSION = $CF::Constants::CF_VERSION;
my $homedir = $ENV{"HOME"};
my %config = %CF::Constants::config;
my @pipeline_folders = ('./', "$homedir/.clusterflow/pipelines/", "$RealBin/pipelines/");
my @module_folders = ('./', "$homedir/.clusterflow/modules/", "$RealBin/modules/");
my $pipeline;

# Get command line parameters
# These are fetched from the config, but can be overwritten on the cl
my $SPLIT_FILES = $CF::Constants::SPLIT_FILES;
my $MAX_RUNS = $CF::Constants::MAX_RUNS;
my $TIME_MULTIPLIER = $CF::Constants::TIME_MULTIPLIER;
my $CLUSTER_ENVIRONMENT = $CF::Constants::CLUSTER_ENVIRONMENT;
my $CLUSTER_MEM_PER_CPU = $CF::Constants::CLUSTER_MEM_PER_CPU;
my $PRIORITY = $CF::Constants::PRIORITY;
my $C_PROJECT = $CF::Constants::C_PROJECT;
my $TOTAL_CORES = $CF::Constants::TOTAL_CORES;
my $TOTAL_MEM = $CF::Constants::TOTAL_MEM;
my $EMAIL = $CF::Constants::EMAIL;
# These are only set on the cl
my $GENOME;
my $cl_reference;
my $cl_paired_end;
my $cl_single_end;
my $cl_no_fn_check;
my $file_list;
my $cl_runfile_prefix = "";
my $cl_params = '';
my $cl_notifications;
my $cl_list_pipelines;
my $cl_list_modules;
my $cl_list_genomes;
my $cl_merge;
my $cl_dryrun;
my $cl_qstat;
my $cl_qstatall;
my $cl_qdel;
my $cl_setup;
my $cl_add_genome;
my $cl_version;
my $cl_check_updates;
my $cl_qname;
my $cl_help;

my $config_result = GetOptions(
	"genome=s" => \$GENOME,
 	"ref=s" => \$cl_reference,
	"split_files=i" => \$SPLIT_FILES,
	"max_runs=i" => \$MAX_RUNS,
	"merge=s" => \$cl_merge,
	"paired" => \$cl_paired_end,
	"single" => \$cl_single_end,
	"no_fn_check" => \$cl_no_fn_check,
	"file_list=s" => \$file_list,
	"runfile_prefix=s" => \$cl_runfile_prefix,
	"params=s" => \$cl_params,
	"email=s" => \$EMAIL,
	"priority=i" => \$PRIORITY,
	"project=s" => \$C_PROJECT,
	"qname=s" => \$cl_qname,
	"cores=i" => \$TOTAL_CORES,
	"mem=s" => \$TOTAL_MEM,
	"time=f" => \$TIME_MULTIPLIER,
	"environment=s" => \$CLUSTER_ENVIRONMENT,
	"notifications=s"  => \$cl_notifications,
	"pipelines" => \$cl_list_pipelines,
	"modules" => \$cl_list_modules,
	"genomes" => \$cl_list_genomes,
	"dry_run" => \$cl_dryrun,
	"qstat" => \$cl_qstat,
	"qstatall" => \$cl_qstatall,
	"qdel=s" => \$cl_qdel,
	"setup" => \$cl_setup,
	"add_genome" => \$cl_add_genome,
	"version" => \$cl_version,
	"check_updates" => \$cl_check_updates,
	"help" => \$cl_help
);

if(!$config_result){
	die "Error! could not parse command line options.. For help, run cf --help\n";
}

# Get the pipeline and file list
$pipeline = shift(@ARGV);
my @files = @ARGV;

# Set up parameters
my $CL_COLOURS = $CF::Constants::CL_COLOURS;
my $CHECK_UPDATES = $CF::Constants::CHECK_UPDATES;
my @NOTIFICATIONS = @CF::Constants::NOTIFICATIONS;
my @MERGE_REGEXES = @CF::Constants::MERGE_REGEXES;
my $CUSTOM_JOB_SUBMIT_COMMAND = $CF::Constants::CUSTOM_JOB_SUBMIT_COMMAND;
my $JOB_SUBMIT_ENV = $CF::Constants::JOB_SUBMIT_ENV;
my $PROJECT_ID = $CF::Constants::PROJECT_ID;
my $JOB_TIMELIMIT = $CF::Constants::JOB_TIMELIMIT;
my $CF_MODULES = $CF::Constants::CF_MODULES;
my %ENV_MODULE_ALIASES = %CF::Constants::ENV_MODULE_ALIASES;
my @LOG_HIGHLIGHT_STRINGS = @CF::Constants::LOG_HIGHLIGHT_STRINGS;
my @LOG_WARNING_STRINGS = @CF::Constants::LOG_WARNING_STRINGS;
my %LOADED_MODULES = %CF::Constants::LOADED_MODULES;

# Set up genomes
my %REFERENCES = %CF::Constants::REFERENCES;
if($cl_reference){
    my ($type, $path) = split('=', $cl_reference, 2);
	$REFERENCES{$type}{"command_line"} = $path;
	$GENOME = "command_line";
}

# Holders for tracking job IDs and loaded modules
my %JOB_NUM_IDS;

if($cl_notifications){
	my @new_nots = split(//, $cl_notifications);
	@NOTIFICATIONS = ();
	foreach my $not (@new_nots){
		push (@NOTIFICATIONS, 'complete') if($not eq 'c');
		push (@NOTIFICATIONS, 'run') if($not eq 'r');
		push (@NOTIFICATIONS, 'success') if($not eq 's');
		push (@NOTIFICATIONS, 'error') if($not eq 'e');
		push (@NOTIFICATIONS, 'abort') if($not eq 'a');
	}
}

# Printing command line options
if($cl_list_pipelines){
	print "".("=" x 32)."\nCluster Flow - available pipelines\n".("=" x 32)."\n";
	print "Installed pipelines:\n";
	foreach my $folder (@pipeline_folders){
		if(-e $folder){
			print "    Directory $folder\n";
			opendir (DIR, $folder) or die $!;
			my @dir_files = sort readdir(DIR);
			while ( my $file = shift @dir_files ) {
				if(substr($file, -7) eq ".config" && substr($file, 0, -7) ne "clusterflow"){
					print "\t- ".substr($file, 0, -7)."\n";
				}
			}
			closedir(DIR);
		} else {
			print "    Directory $folder (not found)\n";
		}
	}
	print "\n";
	exit;
}
if($cl_list_modules){
	print "".("=" x 32)."\nCluster Flow - available modules\n".("=" x 32)."\n";
	print "Available modules:\n";
	foreach my $folder (@module_folders){
		if(-e $folder){
			print "    Directory $folder\n";
			opendir (DIR, $folder) or die $!;
			my @dir_files = sort readdir(DIR);
			while ( my $file = shift @dir_files ) {
				# Remove file extension if it's not .cfmod
				if($file !~ /\.cfmod$/){
					$file =~ s/\.[^.]+$//;
				}
				if($file =~ /\.cfmod$/){
					print "\t- ".substr($file, 0, -6)."\n";
				}
			}
			closedir(DIR);
		} else {
			print "    Directory $folder (not found)\n";
		}
	}
	print "\n";
	exit;
}
if($cl_list_genomes){
	print "".("=" x 32)."\nCluster Flow - available genomes\n".("=" x 32)."\n";
	print CF::Constants::list_clusterflow_genomes();
	exit;
}
if($cl_dryrun){
	print "\n### Cluster Flow is running in Dry Run mode. No cluster jobs will be set off, only printed. Run files will be created. ###\n\n";
	sleep(1);
}
if($cl_qstat){
	if($CLUSTER_ENVIRONMENT =~ /GRIDEngine/i){
		if($CL_COLOURS){
			print CF::Headnodehelpers::parse_qstat(0, 1);
		} else {
			print CF::Headnodehelpers::parse_qstat(0, 0);
		}
	} elsif($CLUSTER_ENVIRONMENT =~ /SLURM/i){
		if($CL_COLOURS){
                print CF::Headnodehelpers::parse_squeue(0, 1);
        } else {
                print CF::Headnodehelpers::parse_squeue(0, 0);
        }
	} elsif($CLUSTER_ENVIRONMENT =~ /local/i){
		if($CL_COLOURS){
                print CF::Headnodehelpers::parse_localjobs(0, 1);
        } else {
                print CF::Headnodehelpers::parse_localjobs(0, 0);
        }
	} else {
		print "Apologies, this function only works for GRIDEngine, SLURM and loca systems.\n";
	}
	exit;
}
if($cl_qstatall){
	unless($CLUSTER_ENVIRONMENT eq 'GRIDEngine'){
		print "Apologies, this function is not yet supported for systems other than GRIDEngine.\n";
		exit;
	}
	if($CL_COLOURS){
		print CF::Headnodehelpers::parse_qstat(1, 1);
	} else {
		print CF::Headnodehelpers::parse_qstat(1, 0);
	}
	exit;
}
if($cl_qdel){
	if($CLUSTER_ENVIRONMENT =~ /GRIDEngine/i){
		print "\nDeleting jobs from pipeline: $cl_qdel\n\n";
		print CF::Headnodehelpers::cf_pipeline_qdel($cl_qdel);
		exit;
	} elsif($CLUSTER_ENVIRONMENT =~ /SLURM/i){
		print "\nDeleting jobs from pipeline: $cl_qdel\n\n";
		print CF::Headnodehelpers::cf_pipeline_scancel($cl_qdel);
		exit;
	} elsif($CLUSTER_ENVIRONMENT =~ /local/i){
		print "\nDeleting jobs from pipeline: $cl_qdel\n\n";
		print CF::Headnodehelpers::cf_pipeline_localkill($cl_qdel);
		exit;
	} else {
		print "Apologies, this function only works for GRIDEngine, SLURM and loca systems.\n";
		exit;
	}
}

if($cl_setup){
	CF::Constants::clusterflow_setup();
	exit;
} else {
	# Warn user if we don't have a home directory config file
	# (probably means no notifications)
	unless(-e "$homedir/.clusterflow/clusterflow.config"){
		warn "".('='x55)."\nYou don't seem to have a config file set up in your
home directory! Cluster Flow may not work without it.
Please run 'cf --setup' before continuing\n".('='x55)."\n\n\n";
	}
}
if($cl_add_genome){
	CF::Constants::clusterflow_add_genome();
	exit;
}

if($cl_version){
	print "Cluster Flow v$CF_VERSION\n\n";
	exit;
}

# Check for updates - either by command line or config file
if($cl_check_updates){
	print "\nCurrent Cluster Flow version: v$CF_VERSION\n\n";
	my ($message, $update_available) = CF::Headnodehelpers::cf_check_updates($CF_VERSION);
	print "$message\n";
	exit;
} elsif($CHECK_UPDATES){
	my ($message, $update_available) = CF::Headnodehelpers::cf_check_updates($CF_VERSION);
	if ($update_available){
		warn $message;
	}
}


if($cl_help){
	if($pipeline){
		print "\n".CF::Constants::clusterflow_pipeline_help($pipeline)."\n";
	} else {
		print CF::Constants::clusterflow_help();
	}
	exit;
}

# Check that we have everything we need
my @valid_environments = ('local', 'GRIDEngine', 'SLURM', 'LSF');
my $environtments_regex = join('|', @valid_environments);
if(length($CLUSTER_ENVIRONMENT) == 0){
	my $global_fn = "$FindBin::RealBin/clusterflow.config";
	if(-e $global_fn){
		die ("Error - Cluster Environment not set.\nPlease configure \@cluster_environment in your global config file:\n$global_fn\n\n");
	} else {
		die ("Error - Cluster Environment not set. Please run 'cf --setup' to create a global config file.\n\n");
	}
} elsif($CLUSTER_ENVIRONMENT !~ /^($environtments_regex)$/i){
	die ("Error - Cluster Environment ($CLUSTER_ENVIRONMENT) not recognised. Valid values: ".join(', ', @valid_environments)."\n\n");
}
if(!$pipeline){
	die ("Error - no pipeline specified. Use --help for instructions.\nSyntax: cf [flags] pipeline_name file_1 file_2..\n\n");
}

if(!$file_list && scalar(@files) == 0){
	die ("Error - no input files specified. Use --help for instructions.\nSyntax: cf [flags] pipeline_name file_1 file_2..\n\n");
}

# If we're using environment modules, check we can find modulecmd (system returns 1 on error)

my $modulecmd = 'modulecmd';

if($CF_MODULES){

	# Check for Lmod
	if (exists $ENV{LMOD_CMD}) {
		$modulecmd = $ENV{LMOD_CMD};
	}
	if (system("which $modulecmd > /dev/null 2>&1")){
		die( "ERROR - could not find $modulecmd on the PATH. Aborting run.\n" .
		"If you don't use environment modules, please set \@ignore_modules\n" .
		"in your Cluster Flow configuration file.\n\n");
	}
}

# Load in the pipeline config file
my $config_found = 0;
foreach my $folder (@pipeline_folders){
	if(-e $folder."$pipeline.config"){

		# Open the pipeline config file handle
		open (CONFIG,$folder."$pipeline.config") or die "Can't read ".$folder."$pipeline.config: $!";

		$config_found = 1;
		last;
	}
}

# No pipeline found - was a single module specified instead?
if(!$config_found){
	my $mod_prefix = '#';
	if($pipeline =~ /^([#>])/){
		$mod_prefix = $1;
		$pipeline =~ s/^([#>])//;
	}
	foreach my $folder (@module_folders){
		my @mod_files = glob $folder.$pipeline.'.cfmod*';
		if(scalar @mod_files == 1){
			# Make a pseudo pipeline config file handle
			my $pipeline_config = "\n".$mod_prefix.$pipeline."\n\n";
			open(CONFIG, "<", \$pipeline_config) or die "Can't open variable file handle for single module pipeline: $!\n\n";

			$config_found = 1;
			last;
		} elsif(scalar @mod_files > 1){
			die "Found more than one module file matching pipeline id:\n - ".join("\n - ", @mod_files)."\n\n";
		}
	}
}
# No pipeline or module found - die with error
if(!$config_found){
	die ("Error! Can't find pipeline or module called $pipeline\n");
}


# Print welcome message
my $cfid = $pipeline.'_'.time;
print "="x50, "\n Cluster Flow v".$CF_VERSION."\n Launching pipeline $cfid\n", "="x50, "\n\n";


# If we have a file list input file, take input from there
my %download_fns;
if($file_list){
	unless(-e $file_list){
		die ("Error - file list not found: $file_list\nExiting...\n\n");
	}
	@files = ();
	open (FILES, $file_list) or die "Can't read $file_list: $!";
	while (<FILES>) {
		chomp;
		s/[\n\r]//;
		if ($_ =~ /^$/) { next; } # Skip blank lines
		my @sections = split(/\s+/);
		push @files, $sections[0];
		if(defined($sections[1])){
			$download_fns{$sections[0]} = $sections[1];
		}
	}
	close(FILES);
}
my $num_files = scalar @files;


# Check file types
my $file_ext;
my $fastq_count = 0;
for (my $i = 0; $i <= $#files; $i++){
	$fastq_count++ if ($files[$i] =~ /f(ast)?q(\.gz)?$/i);
	if(!$cl_no_fn_check){
		if ($files[$i] =~ /\.(\w+)(\.gz)?$/i){
			if($file_ext && $file_ext ne $1){
				die "ERROR - found a mixture of input file types! (.$file_ext and .$1) Exiting..\nUse --no_fn_check to disable this.\n\n";
			}
			$file_ext = $1;
		}
	}
}

# Auto-detect paired end files for fastq input
if(!$cl_paired_end && !$cl_single_end && $fastq_count > 1){
	my $paired_count = 0;
	my $single_count = 0;
	for (my $i = 0; $i <= $#files; $i++){
		if($i < $#files){
			# Make stripped copies of the fns for comparison
			(my $fn1 = $files[$i]) =~ s/_R?[1-4]//g;
			(my $fn2 = $files[$i+1]) =~ s/_R?[1-4]//g;
			if($fn1 eq $fn2){
				$paired_count++;
				$i++;
			} else {
				$single_count++;
			}
		}
	}

	if($paired_count > 0 && $single_count == 0 && $num_files % 2 == 0){
		$cl_paired_end = 1;
		print "Filenames look like paired-end input. Setting --paired\n";
		print "Specify --single to prevent this behaviour.\n\n";
	} elsif($paired_count == 0 && $single_count > 0){
		$cl_single_end = 1;
		print "Filenames look like single-end input. Setting --single\n";
		print "Specify --paired to prevent this behaviour.\n\n";
	} else {
		die "ERROR - found a mixture of single end and paired end files! Exiting..\nSpecify --single or --paired to override.\n\n";
	}
}

#
# MERGE REGEXES
#

# Make an array of file groups for each parallel run
my %file_sets_hash;
if($cl_merge){
	@MERGE_REGEXES = ($cl_merge);
}
if(@MERGE_REGEXES){

	# Loop through each file and add to hash
	foreach my $fn (@files){
		my $group = $fn;
		
		# Loop through regexes and update group if we have a match
		foreach my $merge (@MERGE_REGEXES){
			if(my @captured = $fn =~ m/$merge/){
				my $groupstr = '';
				# Loop through each regex match - $1, $2 and so on.
				for (0 .. $#captured){
					$groupstr .= $captured[$_];
				}
				$group = $groupstr if(length($groupstr) > 0);
			}
		}
		
		# Group doesn't exist yet - add new key to hash
		if(!defined($file_sets_hash{$group})){
			$file_sets_hash{$group} = ();
		}
		push(@{$file_sets_hash{$group}}, $fn);
	}
}
# No regex - make the hash with keys as the filenames
else {
	foreach my $fn (@files){
		$file_sets_hash{$fn} = ();
		push(@{$file_sets_hash{$fn}}, $fn);
	}
}
my $num_file_groups = scalar keys %file_sets_hash;

# Did we find any files to merge?
if($num_file_groups < $num_files){
	print "Found $num_file_groups groups of files from $num_files using merge regexes:\n".join("\n", @MERGE_REGEXES)."\n\n";
} elsif(@MERGE_REGEXES){
	print "No file groups found with merge regexes:\n".join("\n", @MERGE_REGEXES)."\n\n";
	@MERGE_REGEXES = ();
}


#
# MAX RUNS and SPLIT FILES
#
# Is split_files configured? Disable max_runs if so
if($SPLIT_FILES > 1){
	$MAX_RUNS = 0;
}
# Forcing paired end or single end
if ($cl_paired_end){
	if($SPLIT_FILES == 1){
		$SPLIT_FILES = 2;
	}
}
# Set split_files now that we know how many files we have
if($MAX_RUNS > 0){
	$SPLIT_FILES = int(($num_file_groups / $MAX_RUNS) + 0.99); # + 0.99 is to do a ceiling round
	if ($cl_paired_end && ($SPLIT_FILES % 2 == 1)){
		$SPLIT_FILES++;
	}
}

# Convert the file sets hash into an array, bundling if we need to
my @file_sets;
if($SPLIT_FILES > 1){
	my @thisgroup;
	my $thisgroupcounter = 0;
	foreach my $group (sort keys %file_sets_hash) {
		if($thisgroupcounter < $SPLIT_FILES){
			push (@thisgroup, @{$file_sets_hash{$group}});
			$thisgroupcounter++;
		} else {
			push (@file_sets, [@thisgroup]);
			@thisgroup = @{$file_sets_hash{$group}};
			$thisgroupcounter = 1;
		}
	}
	if(scalar @thisgroup > 0){
		push (@file_sets, [@thisgroup]);
	}
}
# One file group per run - just convert the hash to an array
else {
	push (@file_sets, values(%file_sets_hash));
}

my $num_runs = scalar @file_sets;
my $runs = $num_runs == 1 ? 'run' : 'runs';
print "Processing $num_files files in $num_runs $runs.\n\n";


## Write the run file, parse the pipeline
my $runfile;
my %module_tree;
my @indents;
push @indents, \%module_tree;
my @summary_modules;
my $comment_block = 0;

# Write config variables to runfile header
$runfile = <<"EOT";
\@split_files	$SPLIT_FILES
\@total_cores	$TOTAL_CORES
\@total_mem	$TOTAL_MEM
EOT
if($EMAIL){
	$runfile .= "\@email	$EMAIL\n";
}
if($PRIORITY){
	$runfile .= "\@priority	$PRIORITY\n";
}
foreach my $not (@NOTIFICATIONS){
	$runfile .= "\@notification\t$not\n";
}

if(defined($GENOME)){
    my $refs_count = 0;
	$runfile .= "\@genome\t$GENOME\n";
    foreach my $ref_type ( keys %REFERENCES){
        if(defined($REFERENCES{$ref_type}{$GENOME})){
            $runfile .= "\@reference\t$ref_type\t$REFERENCES{$ref_type}{$GENOME}{path}\n";
            $refs_count++;
        }
    }
    if($refs_count == 0){
        die("### Error ###\n\nNo genome references found for genome key \"$GENOME\"..\n\n".
            "Use cf --genomes to see available references and keys.\n\n".
            "Exiting...\n\n");
    }
}
if ($cl_paired_end){
	$runfile .= "\@force_paired_end\n";
} elsif ($cl_single_end){
	$runfile .= "\@force_single_end\n";
}
if (@MERGE_REGEXES){
	foreach my $merge (@MERGE_REGEXES){
		$runfile .= "\@merge_regex\t$merge\n";
	}
}

# Parse pipeline config file
my $pipeline_string = "";
if(@MERGE_REGEXES){
	$pipeline_string .= "#cf_merge_files\n";
	splice @indents, 1;
	push @indents, $indents[$#indents]->{'cf_merge_files'} = {};
}
my $added_summary_mod = 0;
while (<CONFIG>){
	chomp;
	s/\n//;
	s/\r//;

	# Ignore comment blocks
	if($_ =~ /^\/\*/){
		$runfile .= "$_\n";
		$comment_block = 1;
		next;
	}
	if($_ =~ /^\*\//){
		$runfile .= "$_\n";
		$comment_block = 0;
		next;
	}

	# Require python package
	if($_ =~ /^\@require_python_package/ && !$comment_block){
		my $package = (split(/\s+/, $_, 2))[1];
		my $try_import = `python -c "import $package" 2>&1`;
		if(length($try_import) > 0){
			die "\n### Error - The pipeline $pipeline requires the '$package' python package to be installed. Exiting... ### \n\n";
		}
	}

	# Read the pipeline tree
	elsif($_ =~ /^(\t*)#/ && !$comment_block){

        # Check that we haven't already seen a summary module
        if($added_summary_mod){
            die "Error! Normal modules (beginning with #) are not allowed after\nsummary modules (beginning with >) in pipelines.\n\n";
        }

		# Add in any command line parameters to each module
		if($cl_params){
			$_ .= " $cl_params";
		}

		$_ = "\t".$_ if(@MERGE_REGEXES); # Add a tab if we prepended merge
		$pipeline_string .= "$_\n";
		$runfile .= "$_\n";

		$_ =~ s/^(\t*)#//; # Remove hash symbol and count tabs
		splice @indents, length($1)+1;
		push @indents, $indents[$#indents]->{$_} = {};
	}

    # Read the summary jobs
    elsif($_ =~ /^\s*>/ && !$comment_block){

		# Add in any command line parameters
		if($cl_params){
			$_ .= " $cl_params";
		}

		$pipeline_string .= "\t" if(@MERGE_REGEXES); # Add a tab if we prepended merge
		$pipeline_string .= "$_\n";
		$runfile .= "$_\n";

        $_ =~ s/^\s*>|\s+$//g; # Remove gt symbol and trim whitespace
        push (@summary_modules, $_);
        $added_summary_mod = 1;
    }

	# Add everything else to runfile string
	else {
		$runfile .= "$_\n";
	}

}

close(CONFIG);

print "Pipeline to be used:\n\n$pipeline_string\n\n";


# Work out quota of cores and memory allowed per job
$TOTAL_MEM = CF::Helpers::human_readable_to_bytes($TOTAL_MEM);
# Count terminal leaves in pipeline and number of files
my ($num_leaves, $num_jobs) = count_leaves(\%module_tree, 0, 0);
my $num_parallel = $num_leaves * $num_runs;
# Calculate per-job resources
my $cores_allocation = int( $TOTAL_CORES / $num_parallel);
my $memory_allocation = int( $TOTAL_MEM / $num_parallel);
# Sanity checks
if($cores_allocation < 1){
	$cores_allocation = 1; # minimum one core
}
if($memory_allocation < 104857600){
	$memory_allocation = 104857600; # minimum one 100 Mb
}
my $memory_allocation_hr = CF::Helpers::bytes_to_human_readable($memory_allocation);


sub count_leaves {
	# Set up parameters
	my ($mod_tree, $num_leaves, $num_jobs) = @_;

	# Increase job counter
	$num_jobs++;

	# Run through leaves on this branch
	foreach ( keys %{$mod_tree} ){

		my $num_keys = scalar keys(%{$mod_tree->{$_}});

		# Recursively call this function if we're not at the end of a branch
		if ($num_keys > 0) {
			($num_leaves, $num_jobs) = count_leaves ( $mod_tree->{$_}, $num_leaves, $num_jobs ) ;

		# No children, add one to the count for this leaf
		} else {
			$num_leaves++;
		}
	}

	return ($num_leaves, $num_jobs);
}

# Get ready to save the STDOUT from the job submissions
my $jid_base = 'cf_'.$cfid.'_';
my $submit_log = $jid_base.'submissionlog.txt';
if($CLUSTER_ENVIRONMENT =~ /local/i){
    $submit_log = '/dev/null';
}
if($cl_dryrun){
		*SUBMIT = *STDOUT;
} else {
    open (SUBMIT,'>>',$submit_log) or die "Can't write to $submit_log: $!";
    print SUBMIT "Output from $CLUSTER_ENVIRONMENT when submitting Cluster Flow jobs:\n".("="x52)."\n\n";
}


#
# START THE PIPELINE
#

my @qsubs;
my @job_ids;
my %run_job_ids;
my $job_id;
my $prev_dl_id = "";
my @runfns;
my @outfns;
my @finished_run_ids;
# Define these here as we need to remember them after the loop finishes
my $runfn;
my $run_finish_id;
my $outfn;
print "Processing files (one dot per file):\n";
foreach my $file_set (@file_sets){

	# References give me headaches
	my @files = @{$file_set};

	# Log file name
	$outfn = $files[0]."_".$pipeline."_clusterFlow.txt";

	# Make the run file
	$runfn = $cl_runfile_prefix.$files[0]."_$pipeline.run";
	my $date = strftime "%H:%M, %d-%m-%Y", localtime;
	my $this_runfile = "/*\nCluster Flow Run File\nPipeline: $pipeline\nPipeline ID: cf_$cfid\nCreated at $date\n*/\n\n".$runfile."\n\n";

	# Loop through each file in file group
	foreach my $fn (@files){

		# Write status update
		print ".";

		# Is this filename a URL?
		if ($fn =~ /^(((ht|f)tp(s?))\:\/\/)/){

			# Find download filename if it exists
			my $dl_fn;
			if($download_fns{$fn}){
				$dl_fn = $download_fns{$fn};
			} else {
				my @parts = split("/", $fn);
				$dl_fn = pop(@parts);
			}

			# Only do this the first time, else we create a new file for each download when using --split_files
			if ($runfn =~ /^(((ht|f)tp(s?))\:\/\/)/){
				$runfn = $dl_fn."_$pipeline.run";
				$outfn = $dl_fn."_".$pipeline."_clusterFlow.txt";
			}

			push @outfns, $outfn;

			# URL - set up download module qsub job, dependent on previous download job
			$job_id = $jid_base.'download_'.sprintf("%03d", rand(999));
			my $dlcmd = "$RealBin/modules/cf_download.cfmod.pl --run_fn $runfn --job_id $job_id --prev_job_id null --cores 1 --mem 1G --param url=$fn --param dl_fn=$dl_fn";
			my @dlhjid = ();
			if(length($prev_dl_id) > 0){
				@dlhjid = ($prev_dl_id);
			}
			my $qsub = make_submit_cmd($dlcmd, $job_id, \@dlhjid, $prev_dl_id, '1', '1G', '2-00:00:00', $outfn);

			push @qsubs, $qsub;
			push @job_ids, $job_id;
			$prev_dl_id = $job_id;

			# The download module ignores this, but good to add so that mods know how many files we're starting with
			$this_runfile .= "start_000\t$dl_fn\n";

		} elsif(-e $fn) { # Not a URL. Does this file exist?
			# Add the starting filename to the run file
			$job_id = 'start_000';
			$this_runfile .= "$job_id\t$fn\n";
			push @outfns, $outfn;

		} else { # Not a URL, file doesn't exist. Something is wrong!
			die "\nFile $fn doesn't exist.. Something is wrong! Exiting...\n\n";
		}
	}

	# Deduplicate output fn array
	@outfns = do { my %h; @h{@outfns} = @outfns; values %h };

	# Write out the run file
	open (RUNOUT,'>',$runfn) or die "Can't write to $runfn: $!";
	print RUNOUT $this_runfile;
	close(RUNOUT);
    push (@runfns, $runfn);

	# Make up qsub jobs
	make_qsubs (\%module_tree, $job_id, $runfn, $outfn);

	# Qsub job to execute on completion of this run
	$run_finish_id = $jid_base."email_run_complete_".sprintf("%03d", rand(999));
	my $run_finish_cmd = "$RealBin/modules/cf_run_finished.cfmod.pl --run_fn $runfn --job_id $run_finish_id --prev_job_id null --cores 1 --mem 4G --param hide_log_header=true --param outfn=$outfn";
	my $run_finish_qsub = make_submit_cmd($run_finish_cmd, $run_finish_id, \@{$run_job_ids{$runfn}}, 0, '1', '4G', '12:00', $outfn, 1);

	push @qsubs, $run_finish_qsub;
	push @finished_run_ids, $run_finish_id;

}

#
# FINAL JOBS to execute on completion of ALL JOBS (entire pipeline)
#

# Summary Modules
for my $s_mod (@summary_modules){

	# Set up parameters for these leaves
	my ($s_module, $parameter_string) = split(/\s/, $s_mod, 2);
	my $s_job_id = $jid_base.$s_module.'_'.sprintf("%03d", rand(999));

	my $parameters = '';
	if($parameter_string){
		my @params = split(/\s/, $parameter_string);
		my $parameters = "--param ".join(' --param ', @params);
	}

	if(length($s_module) > 0){
        # Get module filename and requirements
        my $s_module_fn = get_mod_fn($s_module);
        my ($s_cores, $s_mem, $s_time) = get_mod_requirements($s_module_fn, $runfn);

		# formulate qsub command
		my $s_cmd = "$s_module_fn --job_id $s_job_id --prev_job_id $run_finish_id --cores $s_cores --mem $s_mem $parameters --param summary_module=true";
		foreach my $rfn (@runfns){
			$s_cmd .= " --run_fn $rfn";
		}

		my $s_qsub = make_submit_cmd($s_cmd, $s_job_id, \@finished_run_ids, $run_finish_id, $s_cores, $s_mem, $s_time, $outfn, 1);
        push (@qsubs, $s_qsub);
        push (@finished_run_ids, $s_job_id);
    }
}

# Cluster Flow module to send pipeline completion e-mail
my $all_runs_finish_id = $jid_base."email_pipeline_complete_".sprintf("%03d", rand(999));
my $all_runs_finish_cmd = "$RealBin/modules/cf_runs_all_finished.cfmod.pl --run_fn $runfn --job_id $all_runs_finish_id --prev_job_id $run_finish_id --cores 1 --mem 4G --param summary_module=true --param hide_log_header=true";
for my $i (0 .. $#outfns) {
	$all_runs_finish_cmd .= " --param outfn_".$i."=".$outfns[$i];
}

my $all_runs_finish_qsub = make_submit_cmd($all_runs_finish_cmd, $all_runs_finish_id, \@finished_run_ids, 0, '1', '4G', '12:00', $outfn, 1);

push @qsubs, $all_runs_finish_qsub;

# Write status update
print "\nFinished processing files.\n";





########################################################
# Recursive function to create and submit cluster jobs
########################################################
# Find the module file
sub get_mod_fn {
    my ($module) = @_;
    my $module_fn;
    foreach my $folder (@module_folders){

    	my @mod_files = glob $folder.$module.'.cfmod*';
		@mod_files = grep { $_ !~ /\.pyc$/ } @mod_files; # Remove ignored filetypes

    	if(scalar @mod_files == 1){
    		$module_fn = $mod_files[0];
    		last;
    	} elsif(scalar @mod_files > 1){
    		die "Found more than one module file matching module id:\n - ".join("\n - ", @mod_files)."\n\n";
    	}
    }
    if(!$module_fn){
    	die "\nError! Can't find module file $module.cfmod*";
    }
    unless(-x $module_fn){
    	die "\nError! Can't execute module file $module_fn\n";
    }
    return $module_fn;
}

sub get_mod_requirements {
    my ($module_fn, $runfn) = @_;

	# Send query to module
	my $req_cmd .= "$module_fn --requirements --run_fn $runfn --cores $cores_allocation --mem $memory_allocation";
	my $response = `$req_cmd 2> /dev/null`;

	# Parse response
	my $cores = '';
	my $mem = '';
	my $time = '';
	foreach my $ln (split("\n", $response)){
		chomp($ln);
		my ($key, $val) = split(':', $ln, 2);
		if($key and $val){
			# trim whitespace
			$key =~ s/^\s+|\s+$//g;
			$val =~ s/^\s+|\s+$//g;

			if($key eq 'cores'){
				$cores = $val;
				$cores =~ s/\D//g;
			}
			if($key eq 'memory'){
				$mem = $val;
			}
			if($key eq 'modules'){
				my @modules = split(/[\s,]+/, $val);
				&CF::Helpers::load_environment_modules(\@modules, \%LOADED_MODULES);
			}
			if($key eq 'time'){
				$time = $val;
			}
			if($key eq 'references'){
				my @refs = split(/[\s,]+/, $val);
				# No --genome specified
				if(!defined $GENOME){
					die "\n### Error - The pipeline $pipeline requires a genome to be set with --genome or --ref. Exiting... ### \n\n";
				} else {
					foreach my $ref (@refs){
						if(!defined $REFERENCES{$ref}{$GENOME}{path}){
							print "\n### Error ###\nNo $ref path found for genome key \"$GENOME\".\n\nAvailable $ref paths:\n";
							foreach my $genome_key ( keys %{$REFERENCES{$ref}}){
				                if(defined($REFERENCES{$ref}{$genome_key}{path})){
				                    print "\tGenome: $genome_key, Path: $REFERENCES{$ref}{$genome_key}{path}\n";
				                }
				            }
							die "\n\nExiting...\n\n";
						}
					}
				}
			}
		}
	}
	
	# Check that we have what we need
	# Don't warn about module requesting more than was suggested by core cf - too verbose.
	my @errors;
	unless($cores =~ /^\d+$/){
		push (@errors, "No cores requested ('$cores') - setting to 1");
		$cores = 1;
	}
	if($cores > $TOTAL_CORES){
		push (@errors, "Too many cores requested ('$cores') - setting to $TOTAL_CORES");
		$cores = $TOTAL_CORES;
	}
	unless($mem =~ /^[\d\.]+[gmkb]*$/i){
		push (@errors, "No memory requested ('$mem') - setting to 1Gb");
		$mem = '1G';
	}
	if(CF::Helpers::human_readable_to_bytes($mem) > CF::Helpers::human_readable_to_bytes($TOTAL_MEM)){
		push (@errors, "Too much memory requested ('$mem') - setting to $TOTAL_MEM");
		$mem = CF::Helpers::human_readable_to_bytes($TOTAL_MEM);
	}
	unless(CF::Helpers::timestamp_to_minutes($time) > 0){
		push (@errors, "No time requested ('$time') - setting to 2 days");
		$time = '2-00:00:00';
	}
	if(scalar @errors > 0){
		warn "\nWarning: error parsing requirements for $module_fn\n  - ".join("\n  - ", @errors)."\n";
		warn "Requirments request command: $req_cmd\nResponse:\n".`$req_cmd 2>&1`."\n";
	}

	# Adjust time request by time multiplier
	if($TIME_MULTIPLIER != 1){
		$time = CF::Helpers::minutes_to_timestamp(CF::Helpers::timestamp_to_minutes($time) * $TIME_MULTIPLIER);
	}

	# Check that we haven't gone over the maximum time allowed
	if($JOB_TIMELIMIT &&
		CF::Helpers::timestamp_to_minutes($JOB_TIMELIMIT) > 0 &&
		CF::Helpers::timestamp_to_minutes($JOB_TIMELIMIT) < CF::Helpers::timestamp_to_minutes($time)) {
		$time = CF::Helpers::minutes_to_timestamp(CF::Helpers::timestamp_to_minutes($JOB_TIMELIMIT));
	}

    return ($cores, $mem, $time);
}

sub make_qsubs {
	# Set up parameters for this branch
	my ($mod_tree, $prev_job, $runfn, $outfn) = @_;

	# Run through leaves
	foreach ( keys %{$mod_tree} ){

		# Set up parameters for these leaves
		my ($module, $parameter_string) = split(/\s/, $_, 2);
		my $job_id = $jid_base.$module.'_'.sprintf("%03d", rand(999));

		next unless(length($module) > 0);
		my $parameters = '';
		if($parameter_string){
			my @params = split(/\s/, $parameter_string);
			$parameters = "--param ".join(' --param ', @params);
		}

        # Get module filename and requirements
        my $module_fn = get_mod_fn($module);
        my ($cores, $mem, $time) = get_mod_requirements($module_fn, $runfn);

    	# Strip number from download job ID so that these
        # files are all read in the next module
    	(my $prev_job_cmd = $prev_job) =~ s/download_[\d]{3}$/download/;

		# formulate qsub command
		my @holdids = ($prev_job);
		my $cmd = "$module_fn --run_fn $runfn --job_id $job_id --prev_job_id $prev_job_cmd --cores $cores --mem $mem $parameters";
		my $qsub = make_submit_cmd($cmd, $job_id, \@holdids, $prev_job, $cores, $mem, $time, $outfn);

		push @qsubs, $qsub;
		push @job_ids, $job_id;
		unless(defined $run_job_ids{$runfn}){
			$run_job_ids{$runfn} = ();
		}
		push @{$run_job_ids{$runfn}}, $job_id;

		# Recursively call this function if we're not at the end of a branch
		if ( ref $mod_tree->{$_} eq 'HASH') {
			make_qsubs ( $mod_tree->{$_}, $job_id, $runfn, $outfn ) ;
		}
	}
}

sub make_submit_cmd {
	# Get the required variables to assemble this job submission
	# $holdjid should be an array reference
	my ($cmd, $job_id, $holdjid, $prev_job, $cores, $mem, $time, $outfn, $noprefix) = @_;

	# Make the command prefix all STDOUT and STDERR with the Job ID for parsing
	unless(defined($noprefix) && $noprefix){
		$cmd = "$cmd 2>&1 | sed s/^/###CF_$job_id:/";
	}

  # Divide memory by number of CPUs if configured
  if( $CLUSTER_MEM_PER_CPU ){
    $mem = CF::Helpers::human_readable_to_bytes($mem) / $cores;
    $mem = CF::Helpers::bytes_to_human_readable($mem);
  }

	my $qsub = "";

	#########################
	# Build command for LSF #
	#########################
	if($CLUSTER_ENVIRONMENT =~ /LSF/i){
		# Build command string
		if($CUSTOM_JOB_SUBMIT_COMMAND && length($CUSTOM_JOB_SUBMIT_COMMAND) > 0){
			$qsub = $CUSTOM_JOB_SUBMIT_COMMAND;
		} else {
			$qsub = 'bsub -n {{cores}} -M {{mem}} -R "rusage[mem={{mem}}]" -o {{outfn}} -J {{job_id}} -N "{{command}}"';
			# Do we have a priority?
			if($PRIORITY && $PRIORITY >= 0){
				$qsub .= "-sp {{priority}} ";
			}
		}

		# Memory has to be specified in megabytes for LSF
		$mem = CF::Helpers::mem_return_mbs($mem);

		# Swap in real values
		$qsub =~ s/\{\{command}}/$cmd/g;
		$qsub =~ s/\{\{job_id}}/$job_id/g;
		$qsub =~ s/\{\{outfn}}/$outfn/g;
		$qsub =~ s/\{\{cores}}/$cores/g;
		$qsub =~ s/\{\{mem}}/$mem/g;
		$qsub =~ s/\{\{time}}/$time/g;
		$qsub =~ s/\{\{priority}}/$PRIORITY/g;
		$qsub =~ s/\{\{email}}/$EMAIL/g;
		$qsub =~ s/\{\{project}}/$C_PROJECT/g;

		# Job dependencies
		if(scalar @{$holdjid} > 0 && $prev_job ne 'start_000'){
			$qsub .= "-w 'done(\"" . join('") && done("', @{$holdjid}) . "\")' ";
		}

	###########################
	# Build command for SLURM #
	###########################
	} elsif($CLUSTER_ENVIRONMENT =~ /SLURM/i){

		# Build command string
		if($CUSTOM_JOB_SUBMIT_COMMAND && length($CUSTOM_JOB_SUBMIT_COMMAND) > 0){
			$qsub = $CUSTOM_JOB_SUBMIT_COMMAND;
		} else {
			$qsub = 'sbatch -p core -n {{cores}} --mem {{mem}} --open-mode=append -o {{outfn}} -J {{job_id}} {{notifications}}';
			# Add in flags if we have them
			if($PRIORITY && $PRIORITY >= 0){
				$qsub .= " --priority {{priority}}";
			}
			if($C_PROJECT){
				$qsub .= " -A {{project}}";
			}
			if($time){
				$qsub .= " -t {{time}}";
			}
		}

    # Add the actual command
		$qsub .= ' --wrap="{{command}}"';


		# Work out notification settings
		my $notification_string = "";
		if(defined($EMAIL) && length($EMAIL) > 0 && @NOTIFICATIONS){
			$notification_string .= ' --mail-user={{email}} ';
			if((grep $_ eq 'suspend', @NOTIFICATIONS) or (grep $_ eq 'abort', @NOTIFICATIONS)){
				$notification_string .= '--mail-type=FAIL ';
			}
			if(grep $_ eq 'end', @NOTIFICATIONS){
				$notification_string .= '--mail-type=END ';
			}
		}

		# Get memory in MB
		# $mem = int(CF::Helpers::human_readable_to_bytes($mem)/1000000);

		# Swap in real values
		$qsub =~ s/\{\{command}}/$cmd/g;
		$qsub =~ s/\{\{job_id}}/$job_id/g;
		$qsub =~ s/\{\{outfn}}/$outfn/g;
		$qsub =~ s/\{\{cores}}/$cores/g;
		$qsub =~ s/\{\{mem}}/$mem/g;
		$qsub =~ s/\{\{time}}/$time/g;
		$qsub =~ s/\{\{priority}}/$PRIORITY/g;
		$qsub =~ s/\{\{notifications}}/$notification_string/g;
		$qsub =~ s/\{\{email}}/$EMAIL/g;
		$qsub =~ s/\{\{project}}/$C_PROJECT/g;

		# Job dependencies
		# This is horrible on SLURM (depends on capturing job submission STDOUT)
		if(scalar @{$holdjid} > 0 && $prev_job ne 'start_000'){
			my @jidarray = ();
			foreach my $jname (@{$holdjid}){
				if(exists $JOB_NUM_IDS{$jname}){
					push(@jidarray, $JOB_NUM_IDS{$jname});
				} elsif(!$cl_dryrun) {
					print "\nCouldn't find numeric job ID! '$job_id' needs ID from '$jname'\n";
				}
			}
			if(scalar @jidarray > 0){
				$qsub .= " --dependency=afterany:" . join(':', @jidarray);
			}
		}

		# Submit Job and capture ID
		unless($cl_dryrun) {
			my $job_submit = `$qsub 2>&1`;
			chomp($job_submit);
            print SUBMIT $job_submit."\n";
			if($job_submit =~ /Submitted batch job (\d+)/){
				$JOB_NUM_IDS{$job_id} = $1;
			} else {
				print "\nERROR! Couldn't find job id for $job_id: $job_submit\n";
			}
		}


	#################################
	# Build command for GRID Engine #
	#################################
	} elsif($CLUSTER_ENVIRONMENT =~ /GRIDEngine/i){

		# Build command string
		if($CUSTOM_JOB_SUBMIT_COMMAND && length($CUSTOM_JOB_SUBMIT_COMMAND) > 0){
			$qsub = $CUSTOM_JOB_SUBMIT_COMMAND;
		} else {
			$qsub = 'echo "{{command}}" | qsub -clear -b n -cwd -V -S /bin/bash -pe {{pe_env}} {{cores}} {{qname}} -l h_vmem={{mem}} -o {{outfn}} -j y -N {{job_id}} {{notifications}}';
			# Do we have a priority?
			if($PRIORITY && $PRIORITY <= 0){
				$qsub .= " -p {{priority}}";
			}
		}

		# Work out qsub notification settings
		my $notification_string = "";
		if(defined($EMAIL) && length($EMAIL) > 0 && @NOTIFICATIONS){
			my $count = 0;
			$notification_string = "-M {{email}} -m ";
			foreach my $not (@NOTIFICATIONS){
				if($not eq 'suspend'){
					$notification_string .= 's';
				}
				if($not eq 'end'){
					$notification_string .= 'e';
				}
				if($not eq 'abort'){
					$notification_string .= 'a';
				}
			}
		}

		my $qname ='';
		if ($cl_qname) {
			$qname = "-q $cl_qname ";
		}
		# Swap in real values
		$qsub =~ s/\{\{command}}/$cmd/g;
		$qsub =~ s/\{\{job_id}}/$job_id/g;
		$qsub =~ s/\{\{outfn}}/$outfn/g;
		$qsub =~ s/\{\{cores}}/$cores/g;
		$qsub =~ s/\{\{qname}}/$qname/g;
		$qsub =~ s/\{\{mem}}/$mem/g;
		$qsub =~ s/\{\{time}}/$time/g;
		$qsub =~ s/\{\{priority}}/$PRIORITY/g;
		$qsub =~ s/\{\{notifications}}/$notification_string/g;
		$qsub =~ s/\{\{email}}/$EMAIL/g;
		$qsub =~ s/\{\{project}}/$C_PROJECT/g;
		$qsub =~ s/\{\{pe_env}}/$JOB_SUBMIT_ENV/g;

		# Job dependencies
		if(scalar @{$holdjid} > 0 && $prev_job ne 'start_000'){
			$qsub .= " -hold_jid ". join(',', @{$holdjid});
		}


	###########################
	# Build command for local #
	###########################
	} elsif($CLUSTER_ENVIRONMENT =~ /local/i){
		# Keepin' it simple..
		$qsub = "$cmd >> $outfn\n";



	###########################################
	# Unrecognised Cluster Environment String #
	###########################################
	} else {
		die ("ERROR - Cluster Environment config value not recognised: $CLUSTER_ENVIRONMENT\n\n");
	}

	return $qsub;

}


# Print qsub jobs to the terminal or submit to cluster
if($cl_dryrun){
	print "\n\n".('-' x 46)."\n Jobs that would be submitted to the cluster:\n".('-' x 46)."\n\n" if $cl_dryrun;
	print "\n\n".('-' x 46)."\n Jobs submitted to the cluster:\n".('-' x 46)."\n\n" if !$cl_dryrun;
	foreach(@qsubs){
		print "$_\n\n";
	}
	if($CLUSTER_ENVIRONMENT =~ /SLURM/i){
		print "\n\n".('-' x 86)."\n Warning - Jobs do not have dependencies (SLURM jobs have to be launched to do this)\n".('-' x 86)."\n\n";
	}
}

# Run jobs
else {
    # SLURM jobs are already sent, during command building
    if($CLUSTER_ENVIRONMENT =~ /SLURM/i){
        print "\nJobs submitted.\n\n";
		print SUBMIT "\n\nTotal number of Cluster Flow jobs submitted: ".(scalar @qsubs)."\n";
        # Print actual commands to the submission log file
        print SUBMIT "\n\n\n\n\nCluster Flow job submission commands:\n".("="x37)."\n\n";
      	foreach(@qsubs){
      		print SUBMIT "$_\n\n";
      	}
    }
    # Run local jobs
    elsif($CLUSTER_ENVIRONMENT =~ /local/i){
    	# Write commands to a bash file
    	my $bash_fn = "cf_local_".$jid_base."commands.sh";
    	open (BASHOUT,'>',$bash_fn) or die "Can't write to $bash_fn: $!";
    	my $date = strftime "%H:%M, %d-%m-%Y", localtime;
        print BASHOUT "#!/bin/bash\n\n# Cluster Flow local script\n# Pipeline: $pipeline\n# Created at $date\n\n";
    	foreach(@qsubs){
    		print BASHOUT "$_\n\n";
    	}
    	close(BASHOUT);
    	# Execute the bash file in a single background job
    	system "nohup bash $bash_fn > /dev/null &";
    	# Tell the user.
    	print "\nRunning in Local mode..\nWritten commands to ${bash_fn} and submitted a background nohup job.\n\n";
    }
    # Kick off everything else
    else {
    	foreach(@qsubs){
          my $submit_text = `$_`;
          chomp($submit_text);
          print SUBMIT "$submit_text\n";
    	}
		print SUBMIT "\n\nTotal number of Cluster Flow jobs submitted: ".(scalar @qsubs)."\n";
		# Print actual commands to the submission log file
		print SUBMIT "\n\n\n\n\nCluster Flow job submission commands:\n".("="x37)."\n\n";
    	foreach(@qsubs){
    		print SUBMIT "$_\n\n";
    	}
    }
}

# Print information about the current environment
print SUBMIT "\n\n\nCurrent Environment Information:\n".('='x32)."\n\n";
if($CF_MODULES) {
	my $mod_list = `$modulecmd sh list 2>&1`;
	if($mod_list){
		print SUBMIT "== Environment Modules Loaded ==\n$mod_list\n";
	}
}
print SUBMIT "== Current PATH ==\n".join("\n", split(":", `echo \$PATH 2>&1`))."\n";
print SUBMIT "== Logged-in User ==\n".`whoami 2>&1`."\n";
print SUBMIT "== System Information ==\n".`uname -a 2>&1`."\n\n";

close (SUBMIT);
